# Judicial Reform and Sentencing Disparities (Y is theil index)
libraries <- c("knitr", "kableExtra", "dplyr", "modelsummary")
for (library_name in libraries) {
  library(library_name, character.only = TRUE)
}
setwd("~/Documents/Journal articles/identification")  
rm(list = ls())
set.seed(123)
n <- 1000000
ep1 <- rnorm(n, mean = 0, sd = 1)
ep2 <- rnorm(n, mean = 0, sd = 1)
ep3 <- rnorm(n, mean = 0, sd = 1)
ep4 <- rnorm(n, mean = 0, sd = 1)
ep5 <- rnorm(n, mean = 0, sd = 1)
U <- pmin(pmax(rpois(n, lambda = 2.3), 1), 5) # judicial biases (likert-type)
W <- pmin(pmax(rpois(n, lambda = 3.3), 1), 5) #political pressure and influence (likert-type)
prob_M <- pmin(pmax(0.05 + 0.02 * W + ep1, 0), 1)
M <- rbinom(n, 1, prob = prob_M) # mandatory sentencing guidelines (binary)
Z <- pmin(pmax(rpois(n, lambda = exp(-0.5 * M + 0.8 * W + ep2)), 1), 3) # judicial discretion
P <- pmin(pmax(rpois(n, lambda = exp(-0.3 *Z - 0.4 * U + ep3)), 1), 3) # public perception of judicial fairness
prob_J <- pmin(pmax(0.1 + 0.01 * P + 0.05 * W + ep4, 0), 1)
J <- rbinom(n, 1, prob = prob_J) # judicial review panel (binary)
sentencing <- pmin(pmax(rpois(n, lambda = exp(-0.9*J - 0.8*M + 0.8 * U +ep5)), 6), 1500) # sentencing (ranges between 6 to 1500 months)
theil_index <- function(x) {
  x <- x[x > 0]
  mean_x <- mean(x)
  sum((x / mean_x) * log(x / mean_x)) / length(x)
}
# Number of observations in each jurisdiction
chunk_size <- 1000
# Number of jurisdictions
num_chunks <- n / chunk_size

# Initialize a dataframe to store Theil indices
theil_df <- data.frame(
  Chunk = integer(num_chunks),
  Y = numeric(num_chunks)
)
# Compute Theil index for each chunk
for (i in 1:num_chunks) {
  start_idx <- (i - 1) * chunk_size + 1
  end_idx <- i * chunk_size
  
  chunk <- sentencing[start_idx:end_idx]
  
  # Compute the Theil index for the chunk
  theil_df$Chunk[i] <- i
  theil_df$Y[i] <- theil_index(chunk)
}
n <- 1000
set.seed(123)
eps1 <- rnorm(n, mean = 0, sd = 1)
eps2 <- rnorm(n, mean = 0, sd = 1)
eps3 <- rnorm(n, mean = 0, sd = 1)
eps4 <- rnorm(n, mean = 0, sd = 1)
eps5 <- rnorm(n, mean = 0, sd = 1)
U <- pmin(pmax(rpois(n, lambda = 2.3), 1), 5) # judicial biases (likert-type))
W <- pmin(pmax(rpois(n, lambda = 3.3), 1), 5) #political pressure and influence (likert-type))
prob_M <- pmin(pmax(0.05 + 0.02 * W + eps1, 0), 1)
M <- rbinom(n, 1, prob = prob_M) # mandatory sentencing guidelines (binary)
Z <- pmin(pmax(rpois(n, lambda = exp(-0.5 * M + 0.8 * W + eps2)), 1), 3) # judicial discretion
P <- pmin(pmax(rpois(n, lambda = exp(-0.3 *Z - 0.4 * U + eps3)), 1), 3) # public perception of judicial fairness
prob_J <- pmin(pmax(0.1 + 0.01 * P + 0.05 * W + eps4, 0), 1)
J <- rbinom(n, 1, prob = prob_J) # judicial review panel (binary)
df_nodes <- data.frame(M, Z, P, J, U, W)
df <- cbind(theil_df, df_nodes)

# Descriptive Statistics
summary(df)
summary <- df[, c("Y","M", "Z", "P", "J", "U", "W")] %>% 
  select(`Sentencing disparities` = Y, `Mandatory sentencing guidelines` = M, `Judicial discretion` = Z, `Public perception of judicial fairness` = P, `Judicial review panel` = J, `Judicial bias` = U, `Political pressure` = W)
summary_list <- lapply (summary, scale)
emptycol = function(x) " "
datasummary(`Sentencing disparities` + `Mandatory sentencing guidelines` + `Judicial discretion` +`Public perception of judicial fairness` +`Judicial review panel` + `Judicial bias` + `Political pressure` ~  Mean + SD + Min + Max + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data=summary, fmt = "%.3f") %>% column_spec(column = 6, image = spec_boxplot(summary_list)) %>% column_spec(column = 7, image =spec_hist(summary_list))

# Conditional expectation of Y given M = 1 and J = 1
E_Y_M1_J1 <- df %>% filter(M == 1 & J == 1) %>% group_by(Z, P) %>% summarise(E_Y_given_M1_J1 = mean(Y), .groups = 'drop') 
# Conditional expectation of Y given M = 0 and J = 0
E_Y_M0_J0 <- df %>% filter(M == 0 & J == 0) %>% group_by(Z, P) %>% summarise(E_Y_given_M0_J0 = mean(Y), .groups = 'drop')
# Marginal probabilities of (P, Z) pairs
Pr_P_Z <- df %>% group_by(Z, P) %>% summarise(Pr_P_Z = n()/nrow(df), .groups = 'drop')
# Join the conditional expectations with the marginal probabilities
E_Y_M1_J1_final <- merge(E_Y_M1_J1, Pr_P_Z, by = c("Z", "P"))
E_Y_M0_J0_final <- merge(E_Y_M0_J0, Pr_P_Z, by = c("Z", "P"))
# Calculate the final sums
E_Y_M1_J1_summed <- sum(E_Y_M1_J1_final$E_Y_given_M1_J1 * E_Y_M1_J1_final$Pr_P_Z)
E_Y_M0_J0_summed <- sum(E_Y_M0_J0_final$E_Y_given_M0_J0 * E_Y_M0_J0_final$Pr_P_Z)
# Experimental Distribution
E_Y_M1_J1_summed
E_Y_M0_J0_summed
# Observational distribution (Conditional on M=1, J=1 and  M=0, J=0)
obs_Y_M1_J1 <- mean(df$Y[M == 1 & J == 1])
obs_Y_M0_J0 <- mean(df$Y[M == 0 & J == 0])
obs_Y_M1_J1
obs_Y_M0_J0
# Causal effect
causal <- (E_Y_M1_J1_summed-E_Y_M0_J0_summed)/E_Y_M0_J0_summed*100
causal_abs <- E_Y_M1_J1_summed-E_Y_M0_J0_summed
# Comparison by treatment status
comparison_treat <- (obs_Y_M1_J1-obs_Y_M0_J0)/obs_Y_M0_J0*100
comparison_treat_abs <- obs_Y_M1_J1-obs_Y_M0_J0
causal_abs
comparison_treat_abs
causal
comparison_treat

